To-do:
%matplotlib inline
import pandas as pd
#import warnings
#warnings.filterwarnings("ignore")
# our vast librairy
import vast as vst
from pydub import AudioSegment
import os
import re
import numpy as np
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
import statistics
import librosa
import heapq
#visualization
from ipywidgets import IntProgress
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns
path = "https://github.com/zhufangda/Telecom_Paristech-3rd_year/raw/master/"\
+"DATA920_Visualization/2018%20Mini-Challenge%201/"
# vectorize the map
map_contours = vst.vectorize("LekagulRoadways2018.png")
path = "https://github.com/zhufangda/Telecom_Paristech-3rd_year/raw/master/"\
+"DATA920_Visualization/2018%20Mini-Challenge%201/"
# read the file
data = pd.read_csv(path + "AllBirdsv4.csv", parse_dates=[5], dayfirst=True)
cat_bird = pd.Categorical(data['English_name'],ordered=False)
i_bp = cat_bird.categories.tolist().index('Rose-crested Blue Pipit')
nb_categories = len(cat_bird.categories.unique())
# Add a column with 1 for the Blue pipits else 0
data['IsRCBP'] = data['English_name'].apply(lambda x: 1 \
if x=="Rose-crested Blue Pipit"\
else 0)
# clean vocalization
# all in lower string
data['Vocalization_type'] = data['Vocalization_type'].apply(str.lower)
# delete spaces
data['Vocalization_type'] = data['Vocalization_type'].apply(str.strip)
# clean the grids
# returns -1 if not possible to select a number beetween 0 and 200
data['X'] = data['X'].apply(vst.clean_grid)
data['Y'] = data['Y'].apply(vst.clean_grid)
kasios_records = pd.read_csv(path + "Test_Birds_Location.csv")
kasios_records = kasios_records.rename(index=str, columns={" X": "X", " Y":"Y"})
def plotmap(list_kasios_bp=[]):
fig, ax = plt.subplots(figsize=(10,10))
# print the map
vst.print_map(ax, map_contours, "All record locations")
# plot Blue pipits
ax.scatter(data.loc[data['IsRCBP']== 1]['X'],
data.loc[data['IsRCBP']== 1]['Y'],
color='blue', alpha=0.7, label="Blue Pipit location")
# plot others
ax.scatter(data.loc[data['IsRCBP']== 0]['X'],
data.loc[data['IsRCBP']== 0]['Y'],
color='green', alpha=0.2, label="Other birds location")
# Add Kasios records with ID
for i, txt in enumerate(kasios_records['ID'].values):
if i in list_kasios_bp:
ax.text(kasios_records['X'].values[i], kasios_records['Y'].values[i],
txt, color='white', ha="center", va="center",
bbox={'pad':0.4, 'boxstyle':'circle',
'edgecolor':'none', 'facecolor':'blue'})
else:
ax.text(kasios_records['X'].values[i], kasios_records['Y'].values[i],
txt, color='black', ha="center", va="center",
bbox={'pad':0.4, 'boxstyle':'circle',
'edgecolor':'none', 'facecolor':'orange'})
ax.scatter([], [], color='orange', marker='o',s=100, label='Kasios records')
ax.legend(bbox_to_anchor=(1, 1), labelspacing=1)
plt.show()
plotmap()
%matplotlib inline
from ipywidgets import interactive
import matplotlib.pyplot as plt
import numpy as np
def f(m, b):
plt.figure(2)
x = np.linspace(-10, 10, num=1000)
plt.plot(x, m * x + b)
plt.ylim(-5, 5)
plt.show()
interactive_plot = interactive(f, m=(-2.0, 2.0), b=(-3, 3, 0.5))
output = interactive_plot.children[-1]
output.layout.height = '350px'
interactive_plot
colors = ['#a6cee3','#009432','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f',
'#ff7f00','#cab2d6','#6a3d9a','#ffff99','#b15928','#004d40','#7B1FA2',
'#7C4DFF','#795548','#0652DD','#B53471','#FF9800','#8BC34A','#CDDC39',
'#b71c1c','#FFC107','#607D8B']
fig, ax = plt.subplots(figsize=(10,10))
vst.print_map(ax, map_contours, "All bird record locations")
elements = []
for i, categ in enumerate(cat_bird.categories.tolist()):
X = data.loc[data['English_name'] == categ]['X'].tolist()
Y = data.loc[data['English_name'] == categ]['Y'].tolist()
element = ax.scatter(X,Y, color=colors[i], alpha=1, marker='*', zorder=5)
elements.append([element])
labels = cat_bird.categories.tolist()
#ax.legend(bbox_to_anchor=(1, 1), labelspacing=1)
len(data)
# delete all useless data : keep only good quality, call and songs
data_sounds = data.loc[data['Quality'] <= 'C']
data_sounds = data_sounds.loc[data.Vocalization_type.isin(['song' , 'call','call, song'])]
len(data_sounds)
cat_bird2 = pd.Categorical(data_sounds['English_name'],ordered=False)
def plot_new_distribub():
fig = plt.subplots(figsize=(12,5))
ax = cat_bird2.value_counts().plot(kind='bar', color='#009432',
label='Other birds', zorder = 2)
ax.get_children()[16].set_color('#0652DD')
ax.get_children()[16].set_label('Blue Pipits')
plt.xticks(rotation=30, ha='right')
plt.ylabel("# of records", fontsize=12)
ax.legend()
ax.grid(which='major', axis='y', linestyle='--', zorder=1)
plt.title("Distribution of the records by bird category while selecting call, songs and ABC quality", fontsize=16)
plt.show()
plot_new_distribub()
#transform mp3 into wav
sound = AudioSegment.from_mp3("Sounds/test3.mp3")
sound = sound.set_frame_rate(44100)
sound = sound.set_channels(1)
sound.export("Sounds/test.wav", format="wav")
# open the wav file
rate, samples = wavfile.read("Sounds/test.wav")
print(rate) ## nb frames per second
print(samples.shape)
n_samples = len(samples)
# if stereo, keep only one channel
if samples.ndim == 2:
samples = samples[:,0]
def log_specgram(audio, sample_rate, window_size=20, step_size=10, eps=1e-10):
"""
This function
"""
nperseg = int(round(window_size * sample_rate / 1e3))
noverlap = int(round(step_size * sample_rate / 1e3))
freqs, times, spec = signal.spectrogram(audio,
fs=sample_rate,
window='hann',
nperseg=nperseg,
noverlap=noverlap,
detrend=False)
return freqs, times, 10 * np.log10(spec.T.astype(np.float32) + eps)
freqs, times, spectrogram = log_specgram(samples, rate)
def plot_magnitude_spectrogram(samples, rate, freqs, times, spectrogram):
n_sample = len(samples)
fig = plt.figure(figsize=(14, 8))
ax1 = fig.add_subplot(211)
ax1.set_title('Raw wave')
ax1.set_ylabel('Magnitude')
ax1.set_xlim(times.min(), times.max())
ax1.plot(np.linspace(0, n_sample / rate, n_sample), samples, color='#ff7f00', linewidth=0.05)
ax2 = fig.add_subplot(212)
ax2.imshow(spectrogram.T, aspect='auto', origin='lower',
extent=[times.min(), times.max(), freqs.min(), freqs.max()], cmap='autumn_r')
ax2.set_title('Spectrogram ')
ax2.set_ylabel('Freqs in Hz')
ax2.set_xlabel('Seconds')
plt.show()
plot_magnitude_spectrogram(samples, rate, freqs, times, spectrogram)
def custom_fft(y, fs):
"""
Calculate the FFT of the samples y with a rate of fs
"""
T = 1.0 / fs
N = y.shape[0]
yf = fft(y)
xf = np.linspace(0.0, 1.0/(2.0*T), N//2)
power = 10 * np.log10(2.0/N * np.abs(yf[0:N//2])) # FFT is simmetrical, so we take just the first half
# FFT is also complex, to we take just the real part (abs)
return xf, power
def plot_fft(fourier, power):
plt.figure(figsize=(14,6))
plt.plot(fourier/1000, power, color='#ff7f00', linewidth=0.05)
plt.xlabel('Frequency (kHz)', fontsize = 16)
plt.ylabel('Power (dB)', fontsize = 16)
plt.show()
fourier, power = custom_fft(samples, rate)
plot_fft(fourier, power)
Using the conclusions of T Papadopoulos, S.J. Roberts, K. Willis, Automated bird sound recognition in realistic settings, Sep.2018
# mean of the 1% most energetic samples
mean_high_NRJ_samples = np.mean(heapq.nlargest(n_samples//100, np.abs(samples)))
# select only samples of that recording that have power of at least 0.25 of the estimated highest level
samples = np.array([sample for i_sample, sample in enumerate(samples) if np.abs(sample) > 0.25 * mean_high_NRJ_samples])
# update nb frames
n_samples = len(samples)
freqs, times, spectrogram = log_specgram(samples, rate)
fourier, power = custom_fft(samples, rate)
plot_magnitude_spectrogram(samples, rate, freqs, times, spectrogram)
plot_fft(fourier, power)
Already done with the function log_specgram(audio, sample_rate, window_size=20, step_size=10, eps=1e-10)
window_size=20
step_size=10
# keep only fq between 1kHz and 10 kHz
indices = [i for i, fq in enumerate(freqs) if fq>1000 and fq<10000]
freqs_red = freqs[indices]
spectrogram_red = spectrogram[:,indices]
from sklearn.preprocessing import normalize
sequence_spectogram_normalized = normalize(spectrogram_red, norm="l1", axis=1)
plot_magnitude_spectrogram(samples, rate, freqs_red, times, sequence_spectogram_normalized)
def get_sequences(spectrogram, l_sequence=100, tolerance=0.3):
nb_frames = len(spectrogram)
list_sequences = []
nb_sequences = nb_frames // l_sequence
if nb_sequences != 0:
for j in range(nb_sequences - 1):
list_sequences.append(spectrogram[j * l_sequence: (j+1) * l_sequence])
else:
# we have less than n_frames_per_sequence in the frame...
# overlap the last sequence if there is sufficient information
if (nb_frames % l_sequence > l_sequence * tolerance):
list_sequences.append(spectrogram[-l_sequence:])
return list_sequences
list_sequences = get_sequences(sequence_spectogram_normalized)
sequence_spectogram_normalized = list_sequences[0]
def plot_spectrogram(freqs, times, spectrogram):
n_sample = len(samples)
fig = plt.figure(figsize=(14, 4))
plt.imshow(spectrogram.T, extent=[0, 100, freqs.min(), freqs.max()], aspect='auto', origin='lower', cmap='autumn_r')
plt.title('Spectrogram ')
plt.ylabel('Freqs in Hz')
plt.xlabel('Nb frames')
plt.show()
plot_spectrogram(freqs_red, times, sequence_spectogram_normalized)
# Create edges for the histograms from e_min to e_max
def edges(e_min, e_max, nb_bins):
return np.arange(e_min, e_max + (e_max-e_min) / nb_bins, (e_max-e_min) / nb_bins)
f_mean_edges = edges(1000, 10000, 100)
f_std_edges = edges(1000, 4000, 50)
f_mode_edges = edges(1000, 10000, 100)
f_delta_mode_edges = edges(-2000, 2000, 50)
n_frames_sequence = min(sequence_spectogram_normalized.shape[0], 100)
# calculate mean, std, mode and delta_mode
f_mean = [np.sum(freqs_red * sequence_spectogram_normalized[i]) for i in range(n_frames_sequence)]
f_std = [(np.abs(np.sum(sequence_spectogram_normalized[i] * ((freqs_red - f_mean[i]) ** 2) ))) ** (0.5) for i in range(n_frames_sequence)]
f_mode = [freqs_red[np.argmax(sequence_spectogram_normalized[i])] for i in range(n_frames_sequence)]
f_delta_mode = np.roll(f_mode, -1) - f_mode
plot_spectrogram(freqs_red, times, sequence_spectogram_normalized)
fig, axs = plt.subplots(1, 3, figsize=(14, 5))
axs[0].plot(range(len(f_mean)),f_mean)
axs[0].set_title("$f_{mean}$", fontsize=16)
axs[1].plot(range(len(f_std)),f_std)
axs[1].set_title("$f_{std}$", fontsize=16)
axs[2].plot(range(len(f_mode)),f_mode)
axs[2].set_title("$f_{mode}$", fontsize=16)
plt.show()
# create histogramms
sequence_histogram1, f_mean_edges, f_std_edges = np.histogram2d(f_mean, f_std, bins=(f_mean_edges, f_std_edges))
sequence_histogram1 = sequence_histogram1.T
sequence_histogram2, f_mode_edges = np.histogram(f_mode, bins=f_mode_edges)
sequence_histogram3, f_mode_edges, f_delta_mode_edges = np.histogram2d(f_mode, f_delta_mode, bins=(f_mode_edges, f_delta_mode_edges))
sequence_histogram3 = sequence_histogram3.T
def plot_features(category, sequence, freq_red,
f_mean_edges = edges(1000, 10000, 100),
f_std_edges = edges(1000, 4000, 50),
f_mode_edges = edges(1000, 10000, 100),
f_delta_mode_edges = edges(-2000, 2000, 50),
n_frames_per_sequence=100):
"""
"""
l_sequence = len(sequence)
if l_sequence < n_frames_per_sequence:
n_frames_per_sequence = l_sequence
f_mean = [np.sum(freqs_red * sequence[i]) for i in range(n_frames_per_sequence)]
f_std = [(np.abs(np.sum(sequence[i] * (freqs_red -f_mean[i]) **2 ))) ** (0.5) for i in range(n_frames_per_sequence)]
f_mode = [freqs_red[np.argmax(sequence[i])] for i in range(n_frames_sequence)]
f_delta_mode = np.roll(f_mode, -1) - f_mode
H1, _, _ = np.histogram2d(f_mean, f_std, bins=(f_mean_edges, f_std_edges))
H2, _ = np.histogram(f_mode, bins=f_mode_edges)
H3, _, _ = np.histogram2d(f_mode, f_delta_mode, bins=(f_mode_edges, f_delta_mode_edges))
fig, axs = plt.subplots(1, 3, figsize=(14, 5))
axs[0].imshow(H1.T,interpolation='nearest', origin='low', extent=[f_mean_edges[0], f_mean_edges[-1], f_std_edges[0], f_std_edges[-1]])
axs[0].set_xlabel("$f_{mean}$", fontsize=16)
axs[0].set_ylabel("$f_{std}$", fontsize=16)
axs[0].set_xlim(f_mean_edges[0], f_mean_edges[-1])
axs[0].set_ylim(f_std_edges[0], f_std_edges[-1])
axs[0].set_aspect(3)
axs[1].stem(f_mode_edges[:-1], H2)
axs[1].set_xlabel("$f_{mode}$", fontsize=16)
axs[1].set_ylabel("nb of values", fontsize=12)
axs[1].set_xlim(f_mode_edges[0], f_mode_edges[-1])
axs[2].imshow(H3.T, interpolation='nearest', origin='low', extent=[f_mode_edges[0], f_mode_edges[-1], f_delta_mode_edges[0], f_delta_mode_edges[-1]])
axs[2].set_xlabel("$f_{mode}$", fontsize=16)
axs[2].set_ylabel("$f_{\Delta mode}$", fontsize=16)
axs[2].set_xlim(f_mode_edges[0], f_mode_edges[-1])
axs[2].set_ylim(f_delta_mode_edges[0], f_delta_mode_edges[-1])
axs[2].yaxis.set_label_position("right")
axs[2].set_aspect(2)
plt.show()
plot_features(1, sequence_spectogram_normalized, freqs_red)
features1 = sequence_histogram1.flatten()
features2 = sequence_histogram2
features3 = sequence_histogram3.flatten()
def get_spectrogram(file, window_size=20, step_size=10):
"""
This function open a wav file and return the desired normalized spectrogram
inputs :
str file : name of the sound file (wav)
int window_size : lenght of a frame in ms
int step_size : for the overlapping
output :
int category : category of the bird according to the name of the file
array spectogram_normalized : the normalized spectrogram
list freqs_red : list of frequences
"""
record_id, _ = re.split('.wav',file)
bird_category = data_sounds.loc[data_sounds['File ID']==int(record_id)]['English_name'].values[0]
category = cat_bird.categories.tolist().index(bird_category)
rate, frames = wavfile.read("Sounds/Out/" + file)
n_frames = len(frames)
# if stereo, keep only one channel
if frames.ndim == 2:
frames = frames[:,0]
# Step 1 : select the most energetic frames
# mean of the 1% most energetic frames
mean_high_NRJ_frames = np.mean(heapq.nlargest(n_frames//100, np.abs(frames)))
# select only frames of that recording that have power of at least 0.25 of the estimated highest level
frames = np.array([frame for i_frame, frame in enumerate(frames) if np.abs(frame) > 0.25 * mean_high_NRJ_frames])
# update nb frames
n_frames = len(frames)
# Step 2 : split into 20ms chunks and get spectrogram
window_size=20
step_size=10
freqs, times, spectrogram = log_specgram(frames, rate, window_size=window_size, step_size=step_size)
# Step 3 : keep only 1-10KHz
# keep only fq between 1kHz and 10 kHz
indices = [i for i, fq in enumerate(freqs) if fq>1000 and fq<10000]
spectrogram_red = spectrogram[:,indices]
freqs_red = freqs[indices]
# Step 4: normalize
spectogram_normalized = normalize(spectrogram_red, norm="l1", axis=1)
return category, spectogram_normalized, freqs_red
def get_spectrogram_kasios(krecord_id, window_size=20, step_size=10):
"""
This function open a wav file and return the desired normalized spectrogram
inputs :
str file : name of the sound file (wav)
int window_size : lenght of a frame in ms
int step_size : for the overlapping
output :
int category : category of the bird according to the name of the file
array spectogram_normalized : the normalized spectrogram
list freqs_red : list of frequences
"""
rate, frames = wavfile.read("Sounds_Kasios/Out/" + str(krecord_id) + ".wav")
n_frames = len(frames)
# if stereo, keep only one channel
if frames.ndim == 2:
frames = frames[:,0]
# Step 1 : select the most energetic frames
# mean of the 1% most energetic frames
mean_high_NRJ_frames = np.mean(heapq.nlargest(n_frames//100, np.abs(frames)))
# select only frames of that recording that have power of at least 0.25 of the estimated highest level
frames = np.array([frame for i_frame, frame in enumerate(frames) if np.abs(frame) > 0.25 * mean_high_NRJ_frames])
# update nb frames
n_frames = len(frames)
# Step 2 : split into 20ms chunks and get spectrogram
window_size=20
step_size=10
freqs, times, spectrogram = log_specgram(frames, rate, window_size=window_size, step_size=step_size)
# Step 3 : keep only 1-10KHz
# keep only fq between 1kHz and 10 kHz
indices = [i for i, fq in enumerate(freqs) if fq>1000 and fq<10000]
spectrogram_red = spectrogram[:,indices]
freqs_red = freqs[indices]
# Step 4: normalize
spectogram_normalized = normalize(spectrogram_red, norm="l1", axis=1)
return spectogram_normalized, freqs_red
def get_sequences(spectrogram, l_sequence=100, tolerance=0.3):
"""
split the spectrogram in spectrograms of size l_sequence (default 100)
and keep the last one if its lenght is longer than tolerance * l_sequence
(overlap with the previous one)
"""
nb_frames = len(spectrogram)
list_sequences = []
nb_sequences = nb_frames // l_sequence
if nb_sequences != 0:
for j in range(nb_sequences):
list_sequences.append(spectrogram[j * l_sequence: (j+1) * l_sequence])
else:
# we have less than n_frames_per_sequence in the frame...
# overlap the last sequence if there is sufficient information
if (nb_frames % l_sequence > l_sequence * tolerance):
list_sequences.append(spectrogram[-l_sequence:])
return list_sequences
def get_sequences_per_category(all_spectrograms, lenght_sequence=100, tolerance=0.3):
"""
This function transform a list of spectrograms
---------------------------------------------------------------------------
inputs:
all_spectrograms : list of spectrograms listed by category
lenght_sequence : number of frame per sequence
tolerance : keep only sequences
with a lenght at least tolerance * lenght_sequence
---------------------------------------------------------------------------
outputs:
all_sequences : list of sequences listed by category
"""
# concatene all spectrogram per category
list_unique_spec = [[] for _ in range(nb_categories)]
for category in range(nb_categories):
if len(all_spectrograms[category])==0:
continue
list_unique_spec[category] = np.vstack(all_spectrograms[category])
all_sequences = [get_sequences(list_unique_spec[category],
l_sequence=lenght_sequence,
tolerance=tolerance)
for category in range(nb_categories)]
return all_sequences
def edges(e_min, e_max, nb_bins):
return np.arange(e_min, e_max + (e_max-e_min) / nb_bins, (e_max-e_min) / nb_bins)
def get_features(category, sequence, freq_red,
f_mean_edges = edges(1000, 10000, 100),
f_std_edges = edges(1000, 4000, 50),
f_mode_edges = edges(1000, 10000, 100),
f_delta_mode_edges = edges(-2000, 2000, 50),
n_frames_per_sequence=100):
"""
"""
l_sequence = len(sequence)
if l_sequence < 100:
for i in range(l_sequence, 100):
sequence.append(sequence[l_sequence-1])
f_mean = [np.sum(freqs_red * sequence[i]) for i in range(n_frames_per_sequence)]
f_std = [(np.abs(np.sum(sequence[i] * (freqs_red -f_mean[i]) **2 ))) ** (0.5) for i in range(n_frames_per_sequence)]
f_mode = [freqs_red[np.argmax(sequence[i])] for i in range(n_frames_sequence)]
f_delta_mode = np.roll(f_mode, -1) - f_mode
sequence_histogram1, _, _ = np.histogram2d(f_mean, f_std, bins=(f_mean_edges, f_std_edges))
features1 = sequence_histogram1.T.flatten()
sequence_histogram2, _ = np.histogram(f_mode, bins=f_mode_edges)
features2 = sequence_histogram2
sequence_histogram3, _, _ = np.histogram2d(f_mode, f_delta_mode, bins=(f_mode_edges, f_delta_mode_edges))
features3 = sequence_histogram3.T.flatten()
return np.concatenate([[category], features1, features2, features3])
def get_features2(category, sequence, freq_red,
n_frames_per_sequence=100):
"""
"""
l_sequence = len(sequence)
if l_sequence < 100:
for i in range(l_sequence, 100):
sequence.append(sequence[l_sequence-1])
f_mean = [np.sum(freqs_red * sequence[i]) for i in range(n_frames_per_sequence)]
f_std = [(np.abs(np.sum(sequence[i] * (freqs_red -f_mean[i]) **2 ))) ** (0.5) for i in range(n_frames_per_sequence)]
f_mode = [freqs_red[np.argmax(sequence[i])] for i in range(n_frames_sequence)]
f_delta_mode = np.roll(f_mode, -1) - f_mode
return np.concatenate([[category], f_mean, f_std, f_mode, f_delta_mode])
def plot2_features(title, H1, H2, H3, freq_red,
f_mean_edges = edges(1000, 10000, 100),
f_std_edges = edges(1000, 4000, 50),
f_mode_edges = edges(1000, 10000, 100),
f_delta_mode_edges = edges(-2000, 2000, 50),
n_frames_per_sequence=100):
"""
"""
fig, axs = plt.subplots(1, 3, figsize=(14, 5))
axs[0].imshow(H1.T,interpolation='nearest', origin='low', extent=[f_mean_edges[0], f_mean_edges[-1], f_std_edges[0], f_std_edges[-1]])
axs[0].set_xlabel("$f_{mean}$", fontsize=16)
axs[0].set_ylabel("$f_{std}$", fontsize=16)
axs[0].set_xlim(f_mean_edges[0], f_mean_edges[-1])
axs[0].set_ylim(f_std_edges[0], f_std_edges[-1])
axs[0].set_aspect(3)
axs[1].stem(f_mode_edges[:-1], H2)
axs[1].set_xlabel("$f_{mode}$", fontsize=16)
axs[1].set_ylabel("nb of values", fontsize=12)
axs[1].set_xlim(f_mode_edges[0], f_mode_edges[-1])
axs[2].imshow(H3.T, interpolation='nearest', origin='low', extent=[f_mode_edges[0], f_mode_edges[-1], f_delta_mode_edges[0], f_delta_mode_edges[-1]])
axs[2].set_xlabel("$f_{mode}$", fontsize=16)
axs[2].set_ylabel("$f_{\Delta mode}$", fontsize=16)
axs[2].set_xlim(f_mode_edges[0], f_mode_edges[-1])
axs[2].set_ylim(f_delta_mode_edges[0], f_delta_mode_edges[-1])
axs[2].yaxis.set_label_position("right")
axs[2].set_aspect(2)
plt.suptitle(title, fontsize=24)
plt.show()
def plot3_features(title, H1, H2, H3, freq_red, axs,
f_mean_edges = edges(1000, 10000, 100),
f_std_edges = edges(1000, 4000, 50),
f_mode_edges = edges(1000, 10000, 100),
f_delta_mode_edges = edges(-2000, 2000, 50),
n_frames_per_sequence=100):
"""
"""
axs[0].imshow(H1.T,interpolation='nearest', origin='low', extent=[f_mean_edges[0], f_mean_edges[-1], f_std_edges[0], f_std_edges[-1]])
axs[0].set_xlim(f_mean_edges[0], f_mean_edges[-1])
axs[0].set_ylim(f_std_edges[0], f_std_edges[-1])
axs[0].set_aspect(3)
axs[1].stem(f_mode_edges[:-1], H2)
axs[1].set_xlim(f_mode_edges[0], f_mode_edges[-1])
axs[1].set_title(title, fontsize=16)
#axs[1].set_aspect(2)
axs[2].imshow(H3.T, interpolation='nearest', origin='low', extent=[f_mode_edges[0], f_mode_edges[-1], f_delta_mode_edges[0], f_delta_mode_edges[-1]])
axs[2].set_xlim(f_mode_edges[0], f_mode_edges[-1])
axs[2].set_ylim(f_delta_mode_edges[0], f_delta_mode_edges[-1])
axs[2].yaxis.set_label_position("right")
axs[2].set_aspect(2)
Transform all mp3 from the In folder that correspond to a good record into wav to Out folder with a rate of 44100 Hz
f = IntProgress(min=0, max=len(data), description='Load files:', bar_style='success') # instantiate the bar
print("progress")
display(f) # display the bar
for file in os.listdir("Sounds/In/"):
record_id = int(max(re.findall('\d+', file), key=len))
# if the ID is in data_sounds, we apply
if record_id in data_sounds['File ID'].values:
sound = AudioSegment.from_mp3("Sounds/In/"+file)
sound = sound.set_frame_rate(44100)
sound = sound.set_channels(1)
sound.export("Sounds/Out/" + str(record_id) + ".wav", format="wav")
f.value += 1
print("done !")
from multiprocessing import Pool
import multiprocessing
from tqdm import tqdm_notebook as tqdm
cores = multiprocessing.cpu_count()
pool = Pool(processes=cores)
file_list = list(os.listdir("Sounds/Out/"))
all_spectrograms = [[] for _ in range(nb_categories)]
for category, spectrogram, freqs_red in tqdm(pool.imap_unordered(get_spectrogram, file_list), total=len(file_list)):
all_spectrograms[category].append(spectrogram)
# retrieve sequences from spectrograms
all_sequences = get_sequences_per_category(all_spectrograms)
# create the dataset from the sequences
dataset = pd.DataFrame(columns=range(10101))
n_rows = 0
for category in range(nb_categories):
for j, sequence in enumerate(all_sequences[category]):
dataset.loc[n_rows] = get_features(category, sequence, freqs_red)
n_rows += 1
dataset.head()
# create the dataset
dataset.to_csv("dataset.csv", float_format='%.3f')
dataset2 = dataset.copy()
dataset2 = dataset2.astype(int)
dataset2.to_csv("dataset2.csv")
len(dataset)
Instead of executing previous lines of commands, just load the dataset.
dataset = pd.read_csv("dataset2.csv")
dataset = dataset.drop(dataset.columns[0], axis=1)
Using the mean of each feature by specy...
dataset_category = dataset.groupby(['0']).mean()
dataset_category.insert(0, '0', 0.0)
categories = []
features1 = []
features2 = []
features3 = []
for row in dataset_category.iterrows():
index, data_i = row
categories.append(index)
features1.append(np.reshape(data_i.values[0:5000], (50, 100)).T)
features2.append(data_i.values[5000:5100])
features3.append(np.reshape(data_i.values[5100:10100], (50, 100)).T)
freqs = np.array(range(1050, 10000, 50))
for i, H1, H2, H3 in zip(categories, features1, features2, features3):
plot2_features(cat_bird.categories.tolist()[i], H1, H2, H3, freqs)
from skimage.measure import compare_ssim
ssim_beetween = np.array([[np.mean([compare_ssim(features1[i], features1[j], full=True)[0],
compare_ssim(features2[i], features2[j], full=True)[0],
compare_ssim(features3[i], features3[j], full=True)[0]])
for i in range(nb_categories)]
for j in range(nb_categories)])
#sim2 = np.array([[np.mean([np.linalg.norm(features1[i]-features1[j], ord='nuc'),
# np.linalg.norm(features2[i]-features2[j]),
# np.linalg.norm(features3[i]-features3[j], ord='nuc')])
# for i in range(nb_categories)]
# for j in range(nb_categories)])
from mpl_toolkits.axes_grid1 import make_axes_locatable
def plot_heatmap(data, row_labels, col_labels, title="",
cbar_kw={}, cbarlabel="", txt=False, **kwargs):
"""
Plot a heatmap from a numpy array and two lists of labels.
Arguments:
data : A 2D numpy array of shape (N,M)
row_labels : A list or array of length N with the labels
for the rows
col_labels : A list or array of length M with the labels
for the columns
Optional arguments:
ax : A matplotlib.axes.Axes instance to which the heatmap
is plotted. If not provided, use current axes or
create a new one.
cbar_kw : A dictionary with arguments to
:meth:`matplotlib.Figure.colorbar`.
cbarlabel : The label for the colorbar
All other arguments are directly passed on to the imshow call.
"""
fig, ax = plt.subplots(figsize=(10,10))
# Plot the heatmap
im = ax.imshow(data, **kwargs)
# Create colorbar
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cbar = ax.figure.colorbar(im, cax=cax, **cbar_kw)
cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
# We want to show all ticks...
ax.set_xticks(np.arange(data.shape[1]))
ax.set_yticks(np.arange(data.shape[0]))
# ... and label them with the respective list entries.
ax.set_xticklabels(col_labels)
ax.set_yticklabels(row_labels)
# Let the horizontal axes labeling appear on top.
#ax.tick_params(top=True, bottom=False,
# labeltop=True, labelbottom=False)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
if txt==True:
for i in range(len(col_labels)):
for j in range(len(row_labels)):
text = ax.text(j, i, np.round(data[i, j],2),
ha="center", va="center", color="black")
# Turn spines off and create white grid.
for edge, spine in ax.spines.items():
spine.set_visible(False)
ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True)
ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True)
ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
ax.tick_params(which="minor", bottom=False, left=False)
ax.set_title(title, fontsize=30)
fig.tight_layout()
plt.show()
plot_heatmap(ssim_beetween, cat_bird.categories.tolist(), cat_bird.categories.tolist(), "Similarity beetween species", cmap="YlGn", cbarlabel="ssim", txt=True)
import networkx as nx
from networkx.readwrite import json_graph
import json
from IPython.display import display, HTML, Javascript
labels = {}
for i, cat in enumerate(cat_bird.categories.tolist()):
labels[i]=cat
def create_graph(similarity, threshold):
n = similarity.shape[0]
G = nx.Graph()
G.add_nodes_from([i for i in range(n)])
for i in range(n):
for j in range(i+1, n):
if similarity[i,j]> threshold:
G.add_edge(i, j, weight=( similarity[i,j] - threshold) * 100 )
return G
G = create_graph(ssim_beetween, 0.75)
for ix in G.nodes():
G.node[ix]['category'] = labels[ix]
G.node[ix]['isKasios'] = 0
for ix,deg in G.degree():
G.node[ix]['degree'] = deg
G.node[ix]['parity'] = (1-deg%2)
#G.node[ix]['katz'] = 0.1
for ix,katz in nx.katz_centrality(G).items():
G.node[ix]['katz'] = katz
# create a json file
datajson = json_graph.node_link_data(G)
with open('graphsim.json', 'w') as f:
json.dump(datajson, f, indent=4)
js_getResults = """<div id="d3-container"></div>
<style>
.node {stroke: #fff; stroke-width: 1.5px;}
.link {stroke: #999; stroke-opacity: .6;}
text {
font: 14px sans-serif;
pointer-events: none;
text-shadow: 0 1px 0 #fff, 1px 0 0 #fff, 0 -1px 0 #fff, -1px 0 0 #fff;
}
</style>
<script>
// We load the latest version of d3.js from the Web.
require.config({paths: {d3: "https://d3js.org/d3.v3.min"}});
require(["d3"], function(d3) {
// Parameter declaration, the height and width of our viz.
var width = 800,
height = 800;
// Colour scale for node colours.
var color = d3.scale.category10();
// We create a force-directed dynamic graph layout.
// D3 has number of layouts - refer to the documentation.
var force = d3.layout.force()
.charge(-1000)
.linkDistance(150)
.size([width, height]);
// We select the < div> we created earlier and add an <svg> container.
// SVG = Scalable Vector Graphics
var svg = d3.select("#d3-container").select("svg")
if (svg.empty()) {
svg = d3.select("#d3-container").append("svg")
.attr("width", width)
.attr("height", height);
}
// We load the JSON network file.
d3.json("graphsim.json", function(error, graph) {
// Within this block, the network has been loaded
// and stored in the 'graph' object.
force.nodes(graph.nodes)
.links(graph.links)
.start();
var link = svg.selectAll(".link")
.data(graph.links)
.enter().append("line")
.attr('stroke-width', function(d) { return d.weight; })
.attr("class", "link");
var node = svg.selectAll(".node")
.data(graph.nodes)
.enter().append("g")
.attr("class", "node")
.call(force.drag)
.on('click', connectedNodes);
node.append("circle")
.attr("r", 15) // radius
.style("fill", function(d) {return d.id==16 ?'#0652DD':'#009432';})
node.append("text")
.attr("dx", function(d){return d.isKasios==0 ? 10 : -5;})
.attr("dy", ".15em")
.text(function(d) { return d.category ;})
.attr("stroke", "black");
node.append("title")
.text(function(d) { return d.category ;});
force.on("tick", function() {
link.attr("x1", function(d) { return d.source.x; })
.attr("y1", function(d) { return d.source.y; })
.attr("x2", function(d) { return d.target.x; })
.attr("y2", function(d) { return d.target.y; });
node.attr("cx", function(d) { return d.x; })
.attr("cy", function(d) { return d.y; });
d3.selectAll("circle").attr("cx", function (d) {
return d.x;
})
.attr("cy", function (d) {
return d.y;
});
d3.selectAll("text").attr("x", function (d) {
return d.x;
})
.attr("y", function (d) {
return d.y;
});
});
var toggle = 0;
var linkedByIndex = {};
for (var i = 0; i < graph.nodes.length; i++) {
linkedByIndex[i + "," + i] = 1;
};
graph.links.forEach(function (d) {
linkedByIndex[d.source.index + "," + d.target.index] = 1;
});
function neighboring(a, b) {
return linkedByIndex[a.index + "," + b.index];
}
function connectedNodes() {
if (toggle == 0) {
var d = d3.select(this).node().__data__;
link.style("opacity", function (o) {
return d.id==o.source.index | d.index==o.target.index ? 1 : 0.8;
});
link.style("stroke-width", function (o) {
return d.index==o.source.index | d.index==o.target.index ? o.weight : 0.8;
});
node.style("opacity", function (o) {
return neighboring(d, o) | neighboring(o, d) ? 1 : 0.3;
});
//Reset the toggle.
toggle = 1;
} else {
//Restore everything back to normal
node.style("opacity", 1);
link.style("opacity", 1);
link.style("stroke-width",function(d) { return d.weight; });
toggle = 0;
}
}
});
});
</script>
"""
display(HTML(js_getResults))
f = IntProgress(min=0, max=15, description='Load files:', bar_style='success') # instantiate the bar
display(f) # display the bar
for file in os.listdir("Sounds_Kasios/In/"):
record_id = int(max(re.findall('\d+', file), key=len))
# if the ID is in data_sounds, we apply
sound = AudioSegment.from_mp3("Sounds_Kasios/In/"+file)
sound = sound.set_frame_rate(44100)
sound = sound.set_channels(1)
sound.export("Sounds_Kasios/Out/" + str(record_id) + ".wav", format="wav")
f.value += 1
nb_kasios = f.value
list_kasios_sequences = [[] for i in range(nb_kasios)]
for i in range(nb_kasios):
t_spectogram_normalized, t_freqs_red = get_spectrogram_kasios(i+1)
list_kasios_sequences[i].extend(get_sequences(t_spectogram_normalized))
# create the dataset from the sequences
dataset_kasios = pd.DataFrame(columns=range(10101))
n_rows = 0
for id in range(nb_kasios):
for _, sequence in enumerate(list_kasios_sequences[id]):
dataset_kasios.loc[n_rows] = get_features(id, sequence, t_freqs_red)
n_rows += 1
dataset_kasios.to_csv("dataset_kasios.csv", float_format='%.3f')
dataset_kasios = pd.read_csv("dataset_kasios.csv")
dataset_kasios = dataset_kasios.drop(dataset_kasios.columns[0], axis=1)
kasios_mean = dataset_kasios.groupby(['0']).mean()
kasios_mean.insert(0, '0', 0.0)
l_kasios_id = []
features1k = []
features2k = []
features3k = []
for row in kasios_mean.iterrows():
index, data_i = row
l_kasios_id.append(index)
features1k.append(np.reshape(data_i.values[0:5000], (50, 100)).T)
features2k.append(data_i.values[5000:5100])
features3k.append(np.reshape(data_i.values[5100:10100], (50, 100)).T)
freqs = np.array(range(1050, 10000, 50))
for i, H1, H2, H3 in zip(l_kasios_id, features1k, features2k, features3k):
plot2_features("Kasios record #" + str(int(i+1)), H1, H2, H3, freqs)
kasios_label = ["Kasios record #" + str(int(i+1)) for i in range(nb_kasios)]
ssim_k = np.array([[np.mean([compare_ssim(features1[i], features1k[j], full=True)[0],
compare_ssim(features2[i], features2k[j], full=True)[0],
compare_ssim(features3[i], features3k[j], full=True)[0]])
for i in range(nb_categories)]
for j in range(nb_kasios)])
plot_heatmap(ssim_k, kasios_label, cat_bird.categories.tolist(),
"Similarity beetween Kasios birds and species",
cmap="YlGn", cbarlabel="ssim")
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
# dataset with all 10100 fatures
dataset3 = dataset.copy()
y = dataset3['0']
X = dataset3.drop(dataset3.columns[0], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
scaler1 = StandardScaler()
scaler1.fit(X_train)
X_train = scaler1.transform(X_train)
X_test = scaler1.transform(X_test)
kasios_id = dataset_kasios['0']
X_kasios = dataset_kasios.drop(dataset_kasios.columns[0], axis=1)
X_kasios = scaler1.transform(X_kasios)
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
random_forest = OneVsRestClassifier(RandomForestClassifier(n_estimators=100, random_state=1))
random_forest.fit(X_train, y_train)
random_forest.score(X_test, y_test)
import scikitplot as skplt
skplt.metrics.plot_confusion_matrix(y_test, random_forest.predict(X_test), normalize=True, figsize=(16,9))
probas = random_forest.predict_proba(X_kasios)
i = 0
probas_kasios = []
for j in range(nb_kasios):
probas_kasios.append(np.mean(np.array([probas[i+k] for k in range(kasios_id.value_counts()[j])]), axis=0))
i+=kasios_id.value_counts()[j]
probas_kasios = np.array(probas_kasios)
plot_heatmap(probas_kasios,kasios_label, cat_bird.categories.tolist(), "probability Kasios = specy, 10100 features", cmap="YlGn", cbarlabel="probability")
probas_kasios_labels = []
for k in range(nb_kasios):
label = ""
best_probas = heapq.nlargest(5, zip(np.round(probas_kasios[k,:],2), cat_bird.categories.tolist()))
for i, (proba, catgory) in enumerate(best_probas):
label += "P" + str(i+1) + ": " + str(proba)+ " - " + catgory + "\n"
probas_kasios_labels.append(label)
def create_graph2(similarity, probas, threshold1=0.75, threshold_probas=0.1):
(p, n) = probas.shape
G = nx.Graph()
for i in range(n+p):
G.add_node(i)
for i in range(n):
for j in range(i+1, n):
if (similarity[i,j] > threshold1):
G.add_edge(i, j, weight= (similarity[i,j] - threshold1) * 100, stype = 0)
for j in range(p):
for i in range(n):
if (probas[j, i] > threshold_probas):
G.add_edge(n + j, i, weight = 2 * np.sqrt((probas[j, i] - threshold_probas) * 100), stype = 1)
return G
G2 = create_graph2(ssim_beetween, probas_kasios)
for ix in G2.nodes():
if ix < nb_categories:
G2.node[ix]['category'] = cat_bird.categories.tolist()[ix]
G2.node[ix]['tip'] = cat_bird.categories.tolist()[ix]
G2.node[ix]['isKasios'] = 0
else:
G2.node[ix]['category'] = 'K' + str(ix - nb_categories + 1)
G2.node[ix]['tip'] = probas_kasios_labels[ix - nb_categories]
G2.node[ix]['isKasios'] = 1
for ix,deg in G2.degree():
G2.node[ix]['degree'] = deg
G2.node[ix]['parity'] = (1-deg%2)
#G.node[ix]['katz'] = 0.1
for ix,katz in nx.katz_centrality(G2).items():
G2.node[ix]['katz'] = katz
# create a json file
datajson2 = json_graph.node_link_data(G2)
with open('graphsim2.json', 'w') as f:
json.dump(datajson2, f, indent=4)
js_getResults2 = """<div id="d3-container2"></div>
<style>
.node {stroke: #fff; stroke-width: 1.5px;}
text {
font: 12px sans-serif;
pointer-events: none;
}
</style>
<script>
// We load the latest version of d3.js from the Web.
require.config({paths: {d3: "https://d3js.org/d3.v3.min"}});
require(["d3"], function(d3) {
// Parameter declaration, the height and width of our viz.
var width = 800,
height = 800;
// We create a force-directed dynamic graph layout.
// D3 has number of layouts - refer to the documentation.
var force = d3.layout.force()
.charge(-1000)
.linkDistance(125)
.size([width, height]);
// We select the < div> we created earlier and add an <svg> container.
// SVG = Scalable Vector Graphics
var svg = d3.select("#d3-container2").select("svg")
if (svg.empty()) {
svg = d3.select("#d3-container2").append("svg")
.attr("width", width)
.attr("height", height)
}
// We load the JSON network file.
d3.json("graphsim2.json", function(error, graph) {
// Within this block, the network has been loaded
// and stored in the 'graph' object.
force.nodes(graph.nodes)
.links(graph.links)
.start();
var link = svg.selectAll(".link")
.data(graph.links)
.enter().append("line")
.attr('stroke-width', function(d) { return d.weight})
.attr("class", "link")
.style("stroke", function(d) { return d.stype==0 ? '#3e2723': '#212121'});
var node = svg.selectAll(".node")
.data(graph.nodes)
.enter().append("g")
.attr("class", "node")
.call(force.drag)
.on('click', connectedNodes);
node.append("circle")
.attr("r", 15) // radius
.style("fill", function(d) {return colornode(d.id)})
node.append("text")
.attr("dx", function(d){return d.isKasios==0 ? 10 : -7;})
.attr("dy", ".15em")
.text(function(d) { return d.category ;})
.attr("stroke", "black");
node.append("title")
.text(function(d) { return d.tip ;});
force.on("tick", function() {
link.attr("x1", function(d) { return d.source.x; })
.attr("y1", function(d) { return d.source.y; })
.attr("x2", function(d) { return d.target.x; })
.attr("y2", function(d) { return d.target.y; });
node.attr("cx", function(d) { return d.x; })
.attr("cy", function(d) { return d.y; });
d3.selectAll("circle").attr("cx", function (d) {
return d.x;
})
.attr("cy", function (d) {
return d.y;
});
d3.selectAll("text").attr("x", function (d) {
return d.x;
})
.attr("y", function (d) {
return d.y;
});
});
function colornode(a) {
if (a == 16)
{return '#0652DD';}
if (a > 18)
{return '#FFC107';}
if (a == 19)
{return '#FF9800';}
if (a == 24)
{return '#FF9800';}
if (a == 29)
{return '#FF9800';}
if (a == 33)
{return '#FF9800';}
else
{return '#009432';}
}
var toggle = 0;
var linkedByIndex = {};
for (var i = 0; i < graph.nodes.length; i++) {
linkedByIndex[i + "," + i] = 1;
};
graph.links.forEach(function (d) {
linkedByIndex[d.source.index + "," + d.target.index] = 1;
});
function neighboring(a, b) {
return linkedByIndex[a.index + "," + b.index];
}
function connectedNodes() {
if (toggle == 0) {
var d = d3.select(this).node().__data__;
link.style("opacity", function (o) {
return d.id==o.source.index | d.index==o.target.index ? 1 : 0.8;
});
link.style("stroke-width", function (o) {
return d.index==o.source.index | d.index==o.target.index ? o.weight : 0.8;
});
node.style("opacity", function (o) {
return neighboring(d, o) | neighboring(o, d) ? 1 : 0.3;
});
//Reset the toggle.
toggle = 1;
} else {
//Restore everything back to normal
node.style("opacity", 1);
link.style("opacity", 1);
link.style("stroke-width",function(d) { return d.weight; });
toggle = 0;
}
}
});
});
</script>
"""
display(HTML(js_getResults2))
list_kasios_bp = [i for i in range(nb_kasios) if probas_kasios[i,16]>0.1]
print("--------------3 MOST PROBABLE SPECIES BY KASIOS RECORD------------------------")
print("******************************************************************************")
for j in range(nb_kasios):
bestprobas = heapq.nlargest(3, zip(probas_kasios[j,:], cat_bird.categories.tolist()))
print("Kasios record #", j+1, " : ")
for i, bestproba in enumerate(bestprobas):
print(" {} : {} , proba {}".format(i+1, bestproba[1], bestproba[0]))
from scipy.stats import mode
predict = random_forest.predict(X_kasios)
i = 0
predict_kasios = []
for j in range(nb_kasios):
predict_kasios.append(mode(np.array([predict[i+k] for k in range(kasios_id.value_counts()[j])]), axis=None))
i+=kasios_id.value_counts()[j]
predict_kasios = np.array(predict_kasios).T[0][0].astype(int)
def plot_comparaison_predict(prediction):
fig, axs = plt.subplots(len(l_kasios_id), 9, figsize=(18,40))
plt.subplots_adjust(hspace = 0.5)
for i, H1, H2, H3 in zip(l_kasios_id, features1k, features2k, features3k):
title_bp = ""
if i in list_kasios_bp:
title_bp = ", may be a Blue pipit "
for j in range(9):
axs[int(i),j].axis('off')
plot3_features(cat_bird.categories.tolist()[i_bp], features1[i_bp], features2[i_bp], features3[i_bp], freqs, axs[int(i),0:3])
plot3_features("K#" + str(int(i + 1)) + title_bp, H1, H2, H3, freqs, axs[int(i),3:6])
plot3_features("predicted : K#" + str(int(i + 1)) + "=" + cat_bird.categories.tolist()[prediction[int(i)]], features1[prediction[int(i)]],
features2[prediction[int(i)]], features3[prediction[int(i)]], freqs, axs[int(i),6:9])
plot_comparaison_predict(predict_kasios)
from sklearn import manifold
tsne = manifold.TSNE(n_components=2, init='random',
random_state=0, perplexity=100)
Y = tsne.fit_transform(X)
fig = plt.figure(figsize=(18, 18))
plt.scatter(Y[:, 0], Y[:, 1],marker='.' , c=y, cmap='tab20', label=y)
plt.title('Titre')
#plt.legend(loc=2, scatterpoints=1)
plt.show()
import umap
embedding = umap.UMAP(n_neighbors=25,
min_dist=0.3,
metric='correlation').fit_transform(X)
fig = plt.figure(figsize=(18, 18))
plt.scatter(embedding[:, 0], embedding[:, 1],marker='.' , c=y, cmap='tab20', label=y)
plt.title('Titre')
#plt.legend(loc=2, scatterpoints=1)
plt.show()
plotmap(list_kasios_bp)